setwd("C:/Users/sijia/Desktop/current working dictionary/代谢组")
#metabolities data
meta <- read.csv("data/phenotype/metabolity_4681.csv")
dim(meta) #4681 902
colnames(meta)[1:227] #studyid status original metabolities
colnames(meta)[228:452] #log-transfer metabolities
colnames(meta)[453:677] #log/sd-transfer metabolities
colnames(meta)[678:902] #int-transfer metabolities
hist(meta[,902])
#check missing values
table(is.na(meta[,678]))
table(complete.cases(meta)) #false = 511

#lifestyle and covariates
lifestyle_cov <- read.csv("data/lifestyle_covariates_4681.csv")
dim(lifestyle_cov)

all(meta$studyid==lifestyle_cov$studyid)
data <- cbind(meta, lifestyle_cov)
#extract controls and MI cases
table(data$status_update)
MI_control <- data[(data$status_update == "control" | data$status_update == "MI"),]
table(MI_control$status_update)
colnames(MI_control)[678:902]
metabolity_name<-c("nmr_xxl_vldl_p","nmr_xxl_vldl_l","nmr_xxl_vldl_pl","nmr_xxl_vldl_c","nmr_xxl_vldl_ce","nmr_xxl_vldl_fc","nmr_xxl_vldl_tg","nmr_xl_vldl_p","nmr_xl_vldl_l","nmr_xl_vldl_pl","nmr_xl_vldl_c","nmr_xl_vldl_ce","nmr_xl_vldl_fc","nmr_xl_vldl_tg","nmr_l_vldl_p","nmr_l_vldl_l","nmr_l_vldl_pl","nmr_l_vldl_c","nmr_l_vldl_ce","nmr_l_vldl_fc","nmr_l_vldl_tg","nmr_m_vldl_p","nmr_m_vldl_l","nmr_m_vldl_pl","nmr_m_vldl_c","nmr_m_vldl_ce","nmr_m_vldl_fc","nmr_m_vldl_tg","nmr_s_vldl_p","nmr_s_vldl_l","nmr_s_vldl_pl","nmr_s_vldl_c","nmr_s_vldl_ce","nmr_s_vldl_fc","nmr_s_vldl_tg","nmr_xs_vldl_p","nmr_xs_vldl_l","nmr_xs_vldl_pl","nmr_xs_vldl_c","nmr_xs_vldl_ce","nmr_xs_vldl_fc","nmr_xs_vldl_tg","nmr_idl_p","nmr_idl_l","nmr_idl_pl","nmr_idl_c","nmr_idl_ce","nmr_idl_fc","nmr_idl_tg","nmr_l_ldl_p","nmr_l_ldl_l","nmr_l_ldl_pl","nmr_l_ldl_c","nmr_l_ldl_ce","nmr_l_ldl_fc","nmr_l_ldl_tg","nmr_m_ldl_p","nmr_m_ldl_l","nmr_m_ldl_pl","nmr_m_ldl_c","nmr_m_ldl_ce","nmr_m_ldl_fc","nmr_m_ldl_tg","nmr_s_ldl_p","nmr_s_ldl_l","nmr_s_ldl_pl","nmr_s_ldl_c","nmr_s_ldl_ce","nmr_s_ldl_fc","nmr_s_ldl_tg","nmr_xl_hdl_p","nmr_xl_hdl_l","nmr_xl_hdl_pl","nmr_xl_hdl_c","nmr_xl_hdl_ce","nmr_xl_hdl_fc","nmr_xl_hdl_tg","nmr_l_hdl_p","nmr_l_hdl_l","nmr_l_hdl_pl","nmr_l_hdl_c","nmr_l_hdl_ce","nmr_l_hdl_fc","nmr_l_hdl_tg","nmr_m_hdl_p","nmr_m_hdl_l","nmr_m_hdl_pl","nmr_m_hdl_c","nmr_m_hdl_ce","nmr_m_hdl_fc","nmr_m_hdl_tg","nmr_s_hdl_p","nmr_s_hdl_l","nmr_s_hdl_pl","nmr_s_hdl_c","nmr_s_hdl_ce","nmr_s_hdl_fc","nmr_s_hdl_tg","nmr_xxl_vldl_pl_per","nmr_xxl_vldl_c_per","nmr_xxl_vldl_ce_per","nmr_xxl_vldl_fc_per","nmr_xxl_vldl_tg_per","nmr_xl_vldl_pl_per","nmr_xl_vldl_c_per","nmr_xl_vldl_ce_per","nmr_xl_vldl_fc_per","nmr_xl_vldl_tg_per","nmr_l_vldl_pl_per","nmr_l_vldl_c_per","nmr_l_vldl_ce_per","nmr_l_vldl_fc_per","nmr_l_vldl_tg_per","nmr_m_vldl_pl_per","nmr_m_vldl_c_per","nmr_m_vldl_ce_per","nmr_m_vldl_fc_per","nmr_m_vldl_tg_per","nmr_s_vldl_pl_per","nmr_s_vldl_c_per","nmr_s_vldl_ce_per","nmr_s_vldl_fc_per","nmr_s_vldl_tg_per","nmr_xs_vldl_pl_per","nmr_xs_vldl_c_per","nmr_xs_vldl_ce_per","nmr_xs_vldl_fc_per","nmr_xs_vldl_tg_per","nmr_idl_pl_per","nmr_idl_c_per","nmr_idl_ce_per","nmr_idl_fc_per","nmr_idl_tg_per","nmr_l_ldl_pl_per","nmr_l_ldl_c_per","nmr_l_ldl_ce_per","nmr_l_ldl_fc_per","nmr_l_ldl_tg_per","nmr_m_ldl_pl_per","nmr_m_ldl_c_per","nmr_m_ldl_ce_per","nmr_m_ldl_fc_per","nmr_m_ldl_tg_per","nmr_s_ldl_pl_per","nmr_s_ldl_c_per","nmr_s_ldl_ce_per","nmr_s_ldl_fc_per","nmr_s_ldl_tg_per","nmr_xl_hdl_pl_per","nmr_xl_hdl_c_per","nmr_xl_hdl_ce_per","nmr_xl_hdl_fc_per","nmr_xl_hdl_tg_per","nmr_l_hdl_pl_per","nmr_l_hdl_c_per","nmr_l_hdl_ce_per","nmr_l_hdl_fc_per","nmr_l_hdl_tg_per","nmr_m_hdl_pl_per","nmr_m_hdl_c_per","nmr_m_hdl_ce_per","nmr_m_hdl_fc_per","nmr_m_hdl_tg_per","nmr_s_hdl_pl_per","nmr_s_hdl_c_per","nmr_s_hdl_ce_per","nmr_s_hdl_fc_per","nmr_s_hdl_tg_per","nmr_vldl_d","nmr_ldl_d","nmr_hdl_d","nmr_serum_c","nmr_vldl_c","nmr_remnant_c","nmr_ldl_c","nmr_hdl_c","nmr_hdl2_c","nmr_hdl3_c","nmr_estc","nmr_freec","nmr_serum_tg","nmr_vldl_tg","nmr_ldl_tg","nmr_hdl_tg","nmr_totpg","nmr_tg_pg","nmr_pc","nmr_sm","nmr_totcho","nmr_apoa1","nmr_apob","nmr_apob_apoa1","nmr_totfa","nmr_unsat","nmr_dha","nmr_la","nmr_faw3","nmr_faw6","nmr_pufa","nmr_mufa","nmr_sfa","nmr_dha_fa","nmr_la_fa","nmr_faw3_fa","nmr_faw6_fa","nmr_pufa_fa","nmr_mufa_fa","nmr_sfa_fa","nmr_glc","nmr_lac","nmr_cit","nmr_ala","nmr_gln","nmr_his","nmr_ile","nmr_leu","nmr_val","nmr_phe","nmr_tyr","nmr_ace","nmr_acace","nmr_bohbut","nmr_crea","nmr_alb","nmr_gp")

class(MI_control$smoking_5groups)
class(MI_control$status_update)
class(MI_control$region_code)
class(MI_control$education)
class(MI_control$fasting_time)
class(MI_control$has_diabetes)
class(MI_control$bmi_calc)
MI_control$region_code=factor(MI_control$region_code)
MI_control$education=factor(MI_control$education)

mi_result <- matrix(nrow=225,ncol=6)
rownames(mi_result)=(metabolity_name)
colnames(mi_result) <- c("estimate","se","zvalue","pr","lci","uci")

#fit logistic regression model for each metabolities
#adjustment: age (continuous), sex, fasting time (<8 or >=8 h), region (10 regions), smoking status (>=25 cig/d, 1-14 cig/d, 15-24 cig/d, Former Never/occasional), and educational attainment (no formal or primary school, middle or high school, technical school or college or university).
for (i in 1:225){
  mi_model <- glm (status_update~MI_control[,678+i-1]+age_at_study_date+is_female+education+fasting_time+region_code+smoking_5groups, family = binomial(link ='logit'),data = MI_control)
  mi_result[i,1]<-coef(summary(mi_model))[2,1]
  mi_result[i,2]<-coef(summary(mi_model))[2,2]
  mi_result[i,3]<-coef(summary(mi_model))[2,3]
  mi_result[i,4]<-coef(summary(mi_model))[2,4]
  mi_result[i,5]<-confint(mi_model,level=0.95)[2,1]
  mi_result[i,6]<-confint(mi_model,level=0.95)[2,2]
}

fdr <- matrix(nrow=225,ncol=1)
rownames(fdr)=(metabolity_name)
colnames(fdr) <- "fdr"
fdr <- p.adjust(mi_result[,4],method="fdr",length(mi_result[,4]))

mi_result<-cbind(mi_result,fdr)
write.csv(mi_result,file="0108_mi_model1.csv")